####################
####################
##### Ambitiousness Quantile Analysis, replicates Figure 4, Table A4
####################
####################

## setwd("C:/Users/acw64/Dropbox/Seeking More Why Women Fail/Pol Behavior Rep Files Ambitious Women 02282020")
## setwd("/Users/sparshasaha/Dropbox/Seeking More Why Women Fail/Pol Behavior Rep Files Ambitious Women 02282020")

library(cregg)
library(ggplot2)
library(FindIt)

####################
####################
### First cleaning and setting up the quantiles using FindIt
####################
####################

### Feed in data

mydata2 = read.csv("ssi.csv")
dim(mydata2)

data <- mydata2

## create Party4 variable

data$Party2 <- factor(data$Interests.Political.Affiliation)
data$Party2 <- ifelse(data$Interests.Political.Affiliation == "Democrat" | data$Interests.Political.Affiliation == "Republican (GOP)", data$Party2, 0)
data$Party2[data$Party2 == 0] <- NA
data$Party3 <- factor(data$Party2)
data$Party4 <- ifelse(data$Party3=="5", "Republican", data$Party3)
data$Party4 <- ifelse(data$Party3=="2", "Democrat", data$Party4)
data$Party4 <- factor(data$Party4)

## 5 = republican, 2 = democrat

vars <-c("candidate_vote", "Gender", "id", "Progressive", "Agenda", "Children", "Personalistic", "resp_gender", "Party4", "election", "variable", "Q27_1", "Q29_1")
mydata <- data[,vars]
nrow(mydata)

## clean this up a bit

mydata$resp_gender2 <- factor(mydata$resp_gender)

## interactions

#mydata$GenderxProgressive <- with(mydata, interaction(Gender, Progressive), drop = TRUE)
#mydata$GenderxAgenda <- with(mydata, interaction(Gender, Agenda), drop = TRUE)
#mydata$GenderxChildren <- with(mydata, interaction(Gender, Children), drop = TRUE)
#mydata$GenderxPersonalistic <- with(mydata, interaction(Gender, Personalistic), drop = TRUE)


vars<-c('candidate_vote', "Gender", "id", "Progressive",
"Agenda", "Children", "Personalistic", "Party4", "resp_gender2", "election", "variable", "Q27_1", "Q29_1")
newdata2<-mydata[,vars]
nrow(newdata2)
head(newdata2)


baselines <- list()
baselines$Agenda <- "Very Few Changes"
baselines$Children <-"No children"
baselines$Personalistic <-"Empathetic"
baselines$Gender <-"Male"
baselines$Progressive <-"No"

## ONLY keep data in final election (3) and structure

mydata3 <- newdata2
nrow(mydata3)

newdat <- subset(mydata3, election == 3)
nrow(newdat)
head(newdat)

newdat$Q27_1 ## candidate 1 ambition
newdat$Q29_1 ## candidate 2 ambition

newdat <- newdat[order(newdat$id),]
head(newdat)
table(newdat$id)

newdat$ambition <- ifelse(newdat$variable=="candidateA", newdat$Q27_1, newdat$Q29_1)
head(newdat)
dim(newdat)

vars<-c("ambition", "Gender", "id", "Progressive",
"Agenda", "Children", "Personalistic")
newdata2 <- newdat[,vars]
nrow(newdata2)
head(newdata2)

newdata2$Parental <- newdata2$Children
newdata2$Children <- NULL

###Make sure factors are ordered, required by FindIt 
newdata2$Gender<-factor(newdata2$Gender, ordered=FALSE, levels=c("Female", "Male"))
newdata2$Progressive<-factor(newdata2$Progressive, ordered=FALSE, levels=c("No", "Yes"))
newdata2$Personalistic<-factor(newdata2$Personalistic, ordered=TRUE, levels=c("Empathetic", "Good Communicator",
"Hard-Working", "Collaborative", "Assertive", "Tough Negotiator", "Determined to Succeed"))
newdata2$Agenda<-factor(newdata2$Agenda, ordered=TRUE, levels=c("Very Few Changes", "Moderate Changes", 
"Complete Overhaul"))
newdata2$Parental<-factor(newdata2$Parental, ordered=TRUE, levels=c("No children", "1 child", "2 children", "3 children"))

set.seed(1234)
F4<- FindIt(model.treat= ambition ~ Progressive+Personalistic+Agenda,
             nway=3,
                        data = newdata2,
             type="continuous",
             treat.type="multiple",
             search.lambdas=TRUE)

## Returns coefficient estimates.
summary(F4)

## Returns predicted values for unique treatment combinations.
pred4 <- predict(F4,unique=TRUE)

head(pred4$data)
dim(pred4$data) ## 84 rows

## merge the pred4 data on ambition treatment effect with baseline data

vars<-c('candidate_vote', "Gender", "id", "Progressive",
"Agenda", "Children", "Personalistic", "Party4", "resp_gender2", "election")
newdata2<-mydata[,vars]
nrow(newdata2)
head(newdata2)
newdata2$Parental<-newdata2$Children

is.data.frame(newdata2)
pred5<-pred4$data
head(pred5)
nrow(pred5)

test<-merge(newdata2, pred5, by=c("Progressive", "Personalistic", "Agenda"), all.x=TRUE)
head(test)
nrow(test)
summary(test$Treatment.effect) ##there are no NAs
hist(test$Treatment.effect)

test$Ambition<-test$Treatment.effect
test$Treatment.effect<-NULL
head(test)


###Define Ambitiousness by quantile 
quantile(test$Ambition)
##       0%        25%        50%        75%       100% 
##-1.1054324 -0.4824296 -0.2146784  0.1318932  1.0226472 


test$Ambitiousness2<-4
test$Ambitiousness2[test$Ambition< 0.1318932]<-3
test$Ambitiousness2[test$Ambition< -0.2146784]<-2
test$Ambitiousness2[test$Ambition< -0.4824296]<-1
summary(test$Ambitiousness2)
table(test$Ambitiousness2) ##these are pretty even, good. 
test$Ambitiousness2<-as.factor(test$Ambitiousness2)
test$Ambitiousness<-test$Ambitiousness2
test$Ambitiousness2<-NULL
test$GenderxAmbitiousness <- with(test, interaction(Gender, Ambitiousness), drop = TRUE)

test$Parental <-factor(test$Parental, levels=c("No children", "1 child", "2 children", "3 children"))

####################
####################
### AMCEs (DV is vote choice)
####################
####################

## pdf(file = "/Users/sparshasaha/Desktop/ssi_baseline.pdf")

pl0<-amce(na.omit(test), candidate_vote ~ Ambitiousness + Parental, id = ~id)

plot(pl0, feature_headers = TRUE,
header_fmt = "(%s)", size = 1, xlab = "Effect on Pr(Candidate Selected)", ylab = "",
xlim = NULL, vline = 0, vline_color = "gray",
theme = ggplot2::theme_bw())
##more ambitious candidates are preferred, but little diff between 3 and 4

## dev.off()

####################
## Now include interaction with candidate gender 
####################

## pdf(file = "/Users/sparshasaha/Desktop/ssi_interaction.pdf")

amces <- cj(na.omit(test), candidate_vote ~ Ambitiousness + Parental, id = ~id, estimate = "amce", by = ~Gender)
diff_amces <- cj(na.omit(test), candidate_vote ~ Ambitiousness + Parental, id = ~id, estimate = "amce_diff", by = ~Gender)
plot(rbind(amces, diff_amces), xlab = "Effect on Pr(Candidate Selected)") + ggplot2::facet_wrap(~BY, ncol = 3L)

## dev.off()

## you can see that women are on a more level playing field, more preferred, as ambitiousness levels go up 

##################################
#### Plotting interactions with clustered SEs, this replicates Figure 4 in text
##################################

### Let's plot the marginal effects. R not great 
### options for packages with cluster se / that go into ggplot. 
### so do it this way

##cluster SE function##
## to cluster by id ##

clx <- 
function(fm, dfcw, cluster){
         library(sandwich);library(lmtest)
         M <- length(unique(cluster))   
         N <- length(cluster)           
         K <- fm$rank                        
         dfc <- (M/(M-1))*((N-1)/(N-K))  
         uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
         vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)*dfcw
	return(list(coeftest= coeftest(fm, vcovCL), vcov=vcovCL))
 }



# meplot()
meplot <- function(model,var1,var2,int,vcov,ci=.95,
                   xlab=var2,ylab=paste("Marginal Effect of",var1),
                   main="Marginal Effect Plot",
                   me_lty=1,me_lwd=1,me_col="black",
                   ci_lty=1,ci_lwd=.5,ci_col="black",
                   yint_lty=2,yint_lwd=1,yint_col="black"){
  require(ggplot2)
  alpha <- 1-ci
  z <- qnorm(1-alpha/2)
  beta.hat <- coef(model)
  cov <- vcov
  z0 <- seq(min(model.frame(model)[,var2],na.rm=T),max(model.frame(model)[,var2],na.rm=T),length.out=1000)
  dy.dx <- beta.hat[var1] + beta.hat[int]*z0
  se.dy.dx <- sqrt(cov[var1,var1] + z0^2*cov[nrow(cov),ncol(cov)] + 2*z0*cov[var1,ncol(cov)])
  upr <- dy.dx + z*se.dy.dx
  lwr <- dy.dx - z*se.dy.dx
  ggplot(data=NULL,aes(x=z0, y=dy.dx)) +
    labs(x=xlab,y=ylab,title=main) +
    geom_line(aes(z0, dy.dx),size = me_lwd, 
              linetype = me_lty, 
              color = me_col) +
    geom_line(aes(z0, lwr), size = ci_lwd, 
              linetype = ci_lty, 
              color = ci_col) +
    geom_line(aes(z0, upr), size = ci_lwd, 
              linetype = ci_lty, 
              color = ci_col) +
    geom_hline(yintercept=0,linetype=yint_lty,
               size=yint_lwd,
               color=yint_col)
}

test$Female<-0
test$Female[test$Gender=="Female"]<-1
summary(test$Female)

fm.2<-lm(candidate_vote ~ Female + Ambition + Female:Ambition, data=test)
summary(fm.2) 
nobs(fm.2)
C1<- clx(fm.2, 1, test$id)
std.err<-sqrt(diag(C1$vcov))
r.est<-cbind(estimate=fm.2$coefficients, std.err, 
       pvalue=round(2*(1-pnorm(abs(fm.2$coefficients/std.err))),5), 
       lower=fm.2$coefficients-1.96*std.err, 
       upper=fm.2$coefficients+1.96*std.err)
       
r.est

## Table A4

library(stargazer)
stargazer(fm.2, title="Results", align=TRUE)
### need to update SEs in stargazer manually in latex 

## pdf(file = "/Users/sparshasaha/Desktop/ssi_marginal.pdf")

## Figure 4

p2<-meplot(model=fm.2,var1="Female",var2="Ambition",int="Female:Ambition",vcov=C1$vcov, ylab="Probability of Winning", 
xlab="Ambitiousness", main="") + 
   theme_classic() + theme(legend.position="none") + theme(text = element_text(size=18)) 
p2

## dev.off()